In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 10)
path='/tmp/'
In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="2"
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
In [3]:
# Generate seasonal data dataframe
n_cases = 365*10
data=[]
for i in range(n_cases):
t = (i/100.)
s = abs((i%7)-3.5)*2.
data += list(t + s + np.random.randn(1))
index = pd.date_range('1/1/2000', periods=n_cases)
serie = pd.Series(data=data, index=index, name='value')
serie.head
Out[3]:
In [4]:
plt.plot(serie)
Out[4]:
In [5]:
plt.plot(serie['1/1/2002':'2/1/2002'])
Out[5]:
In [ ]:
In [6]:
# Separate train test and normalize
test_cases = 300
lenght_sequence = 140
data_trn = serie.values[:-test_cases]
data_tst = serie.values[-test_cases-lenght_sequence:]
print(data_trn.shape, data_tst.shape)
# Normalize
range_trn = np.max(data_trn) - np.min(data_trn)
print('Range: ', range_trn)
data_trn = (data_trn/range_trn) -0.5
data_tst = (data_tst/range_trn) -0.5
In [7]:
# Batch generator with sequences extractor
def generate_batch(serie, batch_size=4, x_size=35, y_size=14, shuffle_data=True):
'''
Generate batches for one epoch
Randomize order for each epoch
'''
serie = np.array(serie) # Convert to array
if len(serie.shape)==1: # if scalar serie convert to vector serie
serie = np.reshape(serie, [serie.shape[0], 1])
x_ini_max = serie.shape[0]-x_size-y_size #Max initial position
shuffle_index = [i for i in range(x_ini_max)]
if shuffle_data:
from random import shuffle
shuffle(shuffle_index)
for x_ini in range(0, x_ini_max-batch_size, batch_size):
X_batch = np.empty([batch_size, x_size, serie.shape[1]])
y_batch = np.empty([batch_size, y_size, serie.shape[1]])
for i in range(batch_size):
X_batch[i] = serie[shuffle_index[x_ini+i] : shuffle_index[x_ini+i]+x_size, :]
y_batch[i] = serie[shuffle_index[x_ini+i]+x_size : shuffle_index[x_ini+i]+x_size+y_size, :]
yield np.array(X_batch), np.array(y_batch)
gb = generate_batch(data_trn, batch_size=32)
X_b, y_b = next(gb)
print(X_b.shape)
print(y_b.shape)
In [ ]:
In [8]:
gpu_options = tf.GPUOptions(allow_growth = True)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=True))
In [9]:
def dense(x, input_size=10, otput_size=1):
W = tf.Variable(tf.truncated_normal([input_size, otput_size], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[otput_size]))
return tf.matmul(x,W) + b
In [10]:
# Parameters
lstm_feat = 256
maxlen_y = 14
#Inputs
x_input = tf.placeholder(tf.float32, shape=[None, lenght_sequence, 1], name='x')
y_input = tf.placeholder(tf.float32, shape=[None, maxlen_y], name='y')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
lstm1 = tf.contrib.rnn.LSTMCell(lstm_feat, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
lstm1 = tf.contrib.rnn.DropoutWrapper(lstm1, output_keep_prob=keep_prob)
lstm_out, _ = tf.nn.dynamic_rnn(lstm1, x_input, dtype=tf.float32, scope='lstm01')
#Final dense layer
y_pred = dense(lstm_out[:,-1,:], input_size=lstm_feat, otput_size=maxlen_y)
print(y_pred)
# Loss function
cost = tf.reduce_sum(tf.square(y_pred - y_input))
# Trainer
learning_rate = tf.placeholder(tf.float32, name='learning_rate')
train_step = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)
In [ ]:
In [11]:
# Create test data
gb = generate_batch(data_tst, x_size=lenght_sequence, batch_size=256)
X_tst, y_tst = next(gb)
print(X_tst.shape, y_tst.shape)
In [12]:
sess.run(tf.global_variables_initializer())
In [13]:
# Train graph
num_epoch=400
batchSize=256
i=0
lr=0.001
for epoch in range(num_epoch):
c_trn = []
gb = generate_batch(data_trn, x_size=lenght_sequence, batch_size=batchSize)
for x_b, y_b in gb:
feed_dict={x_input: x_b, y_input: np.reshape(y_b, [batchSize,maxlen_y]), learning_rate: lr, keep_prob: 0.9}
_, c = sess.run([train_step, cost], feed_dict=feed_dict)
c_trn += [c]
i += 1
if i%10==0:
c_tst = cost.eval(feed_dict={x_input: X_tst, y_input: np.reshape(y_tst, [256, maxlen_y]), keep_prob: 1})
print('Epoch: ', epoch, ' - LR: ',lr, ' - Cost: ',np.mean(c_trn, axis=0), ' - Cost test: ',c_tst )
#lr *= 0.99
In [14]:
#Score next 14 days
i=3
p_tst = y_pred.eval(feed_dict={x_input: [X_tst[i]], keep_prob: 1})
print(p_tst)
print(y_tst[i])
# Plot for 1 step forecast
plt.plot(p_tst[0])
plt.plot(y_tst[i])
plt.show()
In [15]:
fig = plt.figure()
for i in range(24):
fig.add_subplot(8,3,i+1)
p_tst = y_pred.eval(feed_dict={x_input: [X_tst[i]], keep_prob: 1})
real_vs_pred = np.array([ p_tst[0], y_tst[i][:,0]]).T
plt.plot(real_vs_pred)